##########
#Quality of Legislation and Compliance: A Natural Language Processing Approach
#Moritz Osnabruegge, Matia Vannoni
#This script produces Figure 3 and Tables A5-A10
#########



library(dplyr)
library(ggplot2)
library(lmtest)
library(marginaleffects)
library(sandwich)
library(stargazer)


data <- read.csv("data.csv", fileEncoding="UTF-8")



######################  
###Table A5
######################

m1 <- glm(implementation ~ syntactic_complexity + vagueness, 
          data = data, 
          family ="binomial")
summary(m1)

#Average marginal effects
mfx1 <- marginaleffects(m1)
summary(mfx1)
s_mfx1 <- summary(mfx1)

m2 <- glm(implementation ~ syntactic_complexity + vagueness
          + efficient_score
          + interest
          + typodir
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + discretion.index_ms, 
          data = data, 
          family ="binomial")
summary(m2)

m3 <- glm(implementation ~ syntactic_complexity + vagueness
          + efficient_score
          + interest
          + typodir
          + nr_words
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + discretion.index_ms, 
          data = data, 
          family ="binomial")
summary(m3)

#Average marginal effects
mfx3 <- marginaleffects(m3)
summary(mfx3)
s_mfx3 <- summary(mfx3)

m4 <- glm(implementation ~ syntactic_complexity + vagueness
          + efficient_score
          + interest
          + typodir
          + nr_words
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + delegationratio, 
          data = data, 
          family ="binomial")
summary(m4)


stargazer(m1,m2,m3,m4, align=TRUE, out="Table_A5.tex")



######################
###Table A6
######################

m5 <- glm(implementation ~ syntactic_complexity_with + vagueness_with, 
          data = data, 
          family ="binomial")
summary(m5)

m6 <- glm(implementation ~ syntactic_complexity_with + vagueness_with 
          + efficient_score
          + interest
          + typodir
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + discretion.index_ms,
          data = data, 
          family ="binomial")
summary(m6)

m7 <- glm(implementation ~ syntactic_complexity_with + vagueness_with 
          + efficient_score
          + interest
          + typodir
          + nr_words_with
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + discretion.index_ms,
          data = data, 
          family ="binomial")
summary(m7)

m8 <- glm(implementation ~ syntactic_complexity_with + vagueness_with 
          + efficient_score
          + interest
          + typodir
          + nr_words_with
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + delegationratio,
          data = data, 
          family ="binomial")
summary(m8)

stargazer(m5,m6,m7,m8, align=TRUE, out="Table_A6.tex")



#################
###Figure 3
#################

#Panel a
s_mfx1 <- s_mfx1 %>% rename("Variable"="term") 
s_mfx1 <- subset(s_mfx1, Variable=="syntactic_complexity" | Variable=="vagueness")
s_mfx1$Variable[s_mfx1$Variable=="syntactic_complexity"] <- "Complexity" 
s_mfx1$Variable[s_mfx1$Variable=="vagueness"] <- "Vagueness" 

a1 = subset(s_mfx1, Variable=="Complexity" | Variable=="Vagueness")
a1$Variable <- factor(a1$Variable, levels=c("Vagueness", "Complexity"))

p <- ggplot(data=a1) + geom_pointrange(mapping=aes(y=estimate, x=Variable, ymin=conf.low, ymax=conf.high)) +
  coord_flip(ylim=c(-0.135, 0.05)) + 
  scale_y_continuous(name="Average Marginal Effect", breaks=seq(-0.13,0.05, by=0.02)) +
  scale_x_discrete(name="") + 
  geom_hline(yintercept=0, linetype="dotted") +
  theme_bw() + theme(axis.line=element_line(color="black"),
                     axis.text.x=element_text(size=20, colour="black"),
                     axis.text.y=element_text(size=20, colour="black", hjust=0), 
                     axis.title.y=element_text(size=20),
                     axis.title.x=element_text(size=20))

pdf("figure_3a.pdf", width=9, height=4.5) 
print(p)
dev.off()


#Panel b
s_mfx3 <- s_mfx3 %>% rename("Variable"="term") 
s_mfx3 <- subset(s_mfx3, Variable=="syntactic_complexity" | Variable=="vagueness")
s_mfx3$Variable[s_mfx3$Variable=="syntactic_complexity"] = "Complexity" 
s_mfx3$Variable[s_mfx3$Variable=="vagueness"] = "Vagueness" 

a3 <- subset(s_mfx3, Variable=="Complexity" | Variable=="Vagueness")
a3$Variable <- factor(a3$Variable, levels=c("Vagueness", "Complexity"))

p <- ggplot(data=a3) + geom_pointrange(mapping=aes(y=estimate, x=Variable, ymin=conf.low, ymax=conf.high)) +
  coord_flip(ylim=c(-0.135, 0.05)) + 
  scale_y_continuous(name="Average Marginal Effect", breaks=seq(-0.13,0.05, by=0.02)) +
  scale_x_discrete(name="") + 
  geom_hline(yintercept=0, linetype="dotted") +
  theme_bw() + theme(axis.line=element_line(color="black"),
                     axis.text.x=element_text(size=20, colour="black"),
                     axis.text.y=element_text(size=20, colour="black", hjust=0), 
                     axis.title.y=element_text(size=20),
                     axis.title.x=element_text(size=20))

pdf("figure_3b.pdf", width=9, height=4.5) 
print(p)
dev.off()



#################
###Table A7
#################

m9 <- glm(implementation ~ syntactic_complexity*msdis_sum + vagueness *msdis_sum, 
           data=data, 
           family="binomial")
summary(m9)

m10 <- glm(implementation ~ syntactic_complexity*msdis_sum + vagueness*msdis_sum
           + efficient_score
           + interest
           + typodir
           + amending
           + couconf_sum
           + govchange
           + discretion.index_ms,
           data=data, 
           family="binomial")
summary(m10)

m11 <- glm(implementation ~ syntactic_complexity*msdis_sum + vagueness*msdis_sum
           + efficient_score
           + interest
           + typodir
           + nr_words
           + amending
           + couconf_sum
           + govchange
           + discretion.index_ms,
           data=data, 
           family="binomial")
summary(m11)

m12 <- glm(implementation ~ syntactic_complexity*msdis_sum  + vagueness*msdis_sum 
           + efficient_score
           + interest
           + typodir
           + nr_words
           + amending
           + couconf_sum
           + govchange
           + delegationratio,
           data=data, 
           family="binomial")
summary(m12)


stargazer(m9,m10,m11,m12, align=TRUE, out="Table_A7.tex")



#################
###Table A8
#################

data_c<- subset(data, celex=="32000L0078" | celex=="32001L0005" | celex=="32001L0016" | celex=="32001L0029" |celex=="32001L0055" |celex=="32002L0007" |celex=="32000L0055" |celex=="32002L0021"| celex=="32000L0026"| celex=="32001L0110")


m13 <- glm(implementation ~ syntactic_complexity + vagueness, 
          data = data_c, 
          family ="binomial")
summary(m13)

m14 <- glm(implementation ~ syntactic_complexity + vagueness
          + efficient_score
          + interest
          + typodir
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + discretion.index_ms, 
          data = data_c, 
          family ="binomial")
summary(m14)

m15 <- glm(implementation ~ syntactic_complexity + vagueness
          + efficient_score
          + interest
          + typodir
          + nr_words
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + discretion.index_ms, 
          data = data_c, 
          family ="binomial")
summary(m15)

m16 <- glm(implementation ~ syntactic_complexity + vagueness
          + efficient_score
          + interest
          + typodir
          + nr_words
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + delegationratio, 
          data = data_c, 
          family ="binomial")
summary(m16)


stargazer(m13,m14,m15,m16, align=TRUE, out="Table_A8.tex")



#################
###Table A9
#################

m17 <- glm(implementation ~ syntactic_complexity + vagueness + factor(year_adoption), 
           data=data, 
           family="binomial")
summary(m17)

m18 <- glm(implementation ~ syntactic_complexity + vagueness
           + efficient_score
           + interest
           + typodir
           + amending
           + msdis_sum
           + couconf_sum
           + govchange
           + discretion.index_ms
           + factor(year_adoption), 
           data=data, 
           family="binomial")
summary(m18)

m19 <- glm(implementation ~ syntactic_complexity + vagueness
           + efficient_score
           + interest
           + typodir
           + nr_words
           + amending
           + msdis_sum
           + couconf_sum
           + govchange
           + discretion.index_ms
           + factor(year_adoption), 
           data=data, 
           family="binomial")
summary(m19)

m20 <- glm(implementation ~ syntactic_complexity + vagueness
           + efficient_score
           + interest
           + typodir
           + nr_words
           + amending
           + msdis_sum
           + couconf_sum
           + govchange
           + delegationratio
           + factor(year_adoption), 
           data=data, 
           family="binomial")
summary(m20)


stargazer(m17,m18,m19,m20, align=TRUE, out="Table_A9.tex")



#################
###Table A10
#################

m21 <- glm(implementation ~ syntactic_complexity + vagueness, 
           data = data, 
           family ="binomial")
summary(m21)

m22 <- glm(implementation ~ syntactic_complexity + vagueness
           + efficient_score
           + interest
           + typodir
           + amending
           + msdis_sum
           + couconf_sum
           + govchange
           + discretion.index_ms
           + EP_cycle
           + prop_support, 
           data = data, 
           family ="binomial")
summary(m22)

m23 <- glm(implementation ~ syntactic_complexity + vagueness
           + efficient_score
           + interest
           + typodir
           + nr_words
           + amending
           + msdis_sum
           + couconf_sum
           + govchange
           + discretion.index_ms
           + EP_cycle
           + prop_support, 
           data=data, 
           family="binomial")
summary(m23)

m24 <- glm(implementation ~ syntactic_complexity + vagueness
           + efficient_score
           + interest
           + typodir
           + nr_words
           + amending
           + msdis_sum
           + couconf_sum
           + govchange
           + delegationratio
           + EP_cycle
           + prop_support, 
           data=data, 
           family="binomial")
summary(m24)


stargazer(m21,m22,m23,m24, align=TRUE, out="Table_A10.tex")



#################
###Table A11
#################

m25 <- glm(implementation ~ syntactic_complexity + vagueness, 
          data=data, 
          family="binomial")
clustered_se <- vcovCL(m25, cluster=data$celex)
m25_r <- coeftest(m25, clustered_se)

m26 <- glm(implementation ~ syntactic_complexity + vagueness
          + efficient_score
          + interest
          + typodir
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + discretion.index_ms, 
          data=data, 
          family="binomial")
clustered_se <- vcovCL(m26, cluster=data$celex)
m26_r <-coeftest(m26, clustered_se)

m27 <- glm(implementation ~ syntactic_complexity + vagueness
          + efficient_score
          + interest
          + typodir
          + nr_words
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + discretion.index_ms, 
          data=data, 
          family="binomial")
clustered_se <- vcovCL(m27, cluster=data$celex)
m27_r <- coeftest(m27, clustered_se)

m28 <- glm(implementation ~ syntactic_complexity + vagueness
          + efficient_score
          + interest
          + typodir
          + nr_words
          + amending
          + msdis_sum
          + couconf_sum
          + govchange
          + delegationratio, 
          data = data, 
          family ="binomial")
clustered_se <- vcovCL(m28, cluster=data$celex)
m28_r <- coeftest(m28, clustered_se)


stargazer(m25_r,m26_r,m27_r,m28_r, align=TRUE, out="Table_A11.tex")